Matplotlib is the most common charting package, see its documentation for details, and its examples for inspiration.
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams["figure.figsize"] = (12,6)
%config InlineBackend.figure_format='retina' # adapt plots for retina displays
import matplotlib.pyplot as plt
x = [1, 2, 3, 4, 5, 6, 7, 8, 9]
y1 = [1, 3, 5, 3, 1, 3, 5, 3, 1]
y2 = [2, 4, 6, 4, 2, 4, 6, 4, 2]
plt.plot(x, y1, label="line L")
plt.plot(x, y2, label="line H")
plt.plot()
plt.xlabel("x axis")
plt.ylabel("y axis")
plt.title("Line Graph Example")
plt.legend()
plt.show()
import matplotlib.pyplot as plt
# Look at index 4 and 6, which demonstrate overlapping cases.
x1 = [1, 3, 4, 5, 6, 7, 9]
y1 = [4, 7, 2, 4, 7, 8, 3]
x2 = [2, 4, 6, 8, 10]
y2 = [5, 6, 2, 6, 2]
# Colors: https://matplotlib.org/api/colors_api.html
plt.bar(x1, y1, label="Blue Bar", color='b')
plt.bar(x2, y2, label="Green Bar", color='g')
plt.plot()
plt.xlabel("bar number")
plt.ylabel("bar height")
plt.title("Bar Chart Example")
plt.legend()
plt.show()
import matplotlib.pyplot as plt
import numpy as np
# Use numpy to generate a bunch of random data in a bell curve around 5.
n = 5 + np.random.randn(1000)
m = [m for m in range(len(n))]
plt.bar(m, n)
plt.title("Raw Data")
plt.show()
plt.hist(n, bins=20)
plt.title("Histogram")
plt.show()
plt.hist(n, cumulative=True, bins=20)
plt.title("Cumulative Histogram")
plt.show()
import matplotlib.pyplot as plt
x1 = [2, 3, 4]
y1 = [5, 5, 5]
x2 = [1, 2, 3, 4, 5]
y2 = [2, 3, 2, 3, 4]
y3 = [6, 8, 7, 8, 7]
# Markers: https://matplotlib.org/api/markers_api.html
plt.scatter(x1, y1)
plt.scatter(x2, y2, marker='v', color='r')
plt.scatter(x2, y3, marker='^', color='m')
plt.title('Scatter Plot Example')
plt.show()
import matplotlib.pyplot as plt
labels = 'S1', 'S2', 'S3'
sections = [56, 66, 24]
colors = ['c', 'g', 'y']
plt.pie(sections, labels=labels, colors=colors,
startangle=90,
explode = (0, 0.1, 0),
autopct = '%1.2f%%')
plt.axis('equal') # Try commenting this out.
plt.title('Pie Chart Example')
plt.show()
import matplotlib.pyplot as plt
import numpy as np
ys = 200 + np.random.randn(100)
x = [x for x in range(len(ys))]
plt.plot(x, ys, '-')
plt.fill_between(x, ys, 195, where=(ys > 195), facecolor='g', alpha=0.6)
plt.title("Fills and Alpha Example")
plt.show()
import matplotlib.pyplot as plt
import numpy as np
def random_plots():
xs = []
ys = []
for i in range(20):
x = i
y = np.random.randint(10)
xs.append(x)
ys.append(y)
return xs, ys
fig = plt.figure()
ax1 = plt.subplot2grid((5, 2), (0, 0), rowspan=1, colspan=2)
ax2 = plt.subplot2grid((5, 2), (1, 0), rowspan=3, colspan=2)
ax3 = plt.subplot2grid((5, 2), (4, 0), rowspan=1, colspan=1)
ax4 = plt.subplot2grid((5, 2), (4, 1), rowspan=1, colspan=1)
x, y = random_plots()
ax1.plot(x, y)
x, y = random_plots()
ax2.plot(x, y)
x, y = random_plots()
ax3.plot(x, y)
x, y = random_plots()
ax4.plot(x, y)
plt.tight_layout()
plt.show()
To customize styling further please see the matplotlib docs.
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# Make a 10 x 10 heatmap of some random data
side_length = 10
# Start with a 10 x 10 matrix with values randomized around 5
data = 5 + np.random.randn(side_length, side_length)
# The next two lines make the values larger as we get closer to (9, 9)
data += np.arange(side_length)
data += np.reshape(np.arange(side_length), (side_length, 1))
# Generate the heatmap
sns.heatmap(data)
plt.show()
from altair_widgets import interact_with
interact_with(source)
import altair as alt
from vega_datasets import data
source = data.cars()
brush = alt.selection(type='interval')
points = alt.Chart(source).mark_point().encode(
x='Horsepower',
y='Miles_per_Gallon',
color=alt.condition(brush, 'Origin', alt.value('lightgray'))
).add_selection(
brush
)
bars = alt.Chart(source).mark_bar().encode(
y='Origin',
color='Origin',
x='count(Origin)'
).transform_filter(
brush
)
points & bars
import plotly
plotly.offline.init_notebook_mode(connected=False)
from plotly.graph_objs import *
import pandas as pd
import numpy as np
# Create dataframe with random data
df = pd.DataFrame(np.random.randint(0,100,size=(10, 4)), columns=list('ABCD'))
data = [Bar(x=df.A,
y=df.B)]
plotly.offline.iplot(data)
import plotly
plotly.offline.init_notebook_mode()
import numpy as np
from plotly.offline import iplot
from plotly.graph_objs import *
x = np.random.randn(2000)
y = np.random.randn(2000)
iplot([Histogram2dContour(x=x, y=y, contours=histogram2dcontour.Contours(coloring='heatmap')),
Scatter(x=x, y=y, mode='markers', marker=scatter.Marker(color='white', size=3, opacity=0.3))], show_link=False)
import pandas as pd
import numpy as np
import holoviews as hv
hv.extension('plotly')
# Load sample dataset
from sklearn.datasets import load_iris
iris = load_iris()
iris_df = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
columns= list(iris['feature_names']) + ['species'])
# Declaring Data
from holoviews.operation import gridmatrix
ds = hv.Dataset(iris_df)
grouped_by_species = ds.groupby('species', container_type=hv.NdOverlay)
grid = gridmatrix(grouped_by_species, diagonal_type=hv.Scatter)
# Plot
grid.options('Scatter', bgcolor='#efe8e2', size=4)
import numpy as np
from bqplot import pyplot as plt
plt.figure(1, title='Line Chart')
np.random.seed(0)
n = 200
x = np.linspace(0.0, 10.0, n)
y = np.cumsum(np.random.randn(n))
plt.plot(x, y)
plt.show()
import umap
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
digits = load_digits()
embedding = umap.UMAP().fit_transform(digits.data)
embedding
plt.scatter(embedding[:, 0], embedding[:, 1], c=digits.target, cmap='Spectral', s=5)
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(10))
plt.title('UMAP projection of the Digits dataset', fontsize=24);
pandas-profiling creates HTML profiling reports from pandas DataFrame objects.
# Load sample dataset
import pandas as pd
meteorite_df = pd.read_csv('https://data.nasa.gov/api/views/gh4g-9sfh/rows.csv')
meteorite_df.head()
# Load pandas_profiling
import pandas_profiling
# there are some irrelevant warning you might want to filter
import warnings
warnings.filterwarnings("ignore")
# Generate report for dataframe
pandas_profiling.ProfileReport(meteorite_df)
# Load missingo
import missingno as msno
%matplotlib inline
# Load sample dataset
import pandas as pd
meteorite_df = pd.read_csv('https://data.nasa.gov/api/views/gh4g-9sfh/rows.csv')
meteorite_df.head()
The msno.matrix nullity matrix is a data-dense display which lets you quickly visually pick out patterns in data completion.
msno.matrix(meteorite_df)
The msno.heatmap measures nullity correlation: how strongly the presence or absence of one variable affects the presence of another:
msno.heatmap(meteorite_df)
The msno.bar is a simple visualization of nullity by column:
msno.bar(meteorite_df)
The msno.dendrogram allows you to more fully correlate variable completion, revealing trends deeper than the pairwise ones visible in the correlation heatmap.
msno.dendrogram(meteorite_df)
# Load SHAP
import shap
shap.initjs() # load JS visualization code to notebook
# train XGBoost model
import xgboost
X,y = shap.datasets.boston()
model = xgboost.train({"learning_rate": 0.01}, xgboost.DMatrix(X, label=y), 100)
# explain the model's predictions using SHAP values
# (same syntax works for LightGBM, CatBoost, and scikit-learn models)
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
# visualize the first prediction's explanation
shap.force_plot(explainer.expected_value, shap_values[0,:], X.iloc[0,:])
# visualize the training set predictions
shap.force_plot(explainer.expected_value, shap_values, X)
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
shap.dependence_plot("RM", shap_values, X)
# summarize the effects of all the features
shap.summary_plot(shap_values, X)
shap.summary_plot(shap_values, X, plot_type="bar")
import numpy as np
from PIL import Image
def display_image(x):
x_scaled = np.uint8(255 * (x - x.min()) / (x.max() - x.min()))
return Image.fromarray(x_scaled)
display_image(np.random.rand(200,200))